results_dir <- "results/final"
figures_dir <- "figures/final"
sourcedata_dir <- "source_data/final"
dir.create(results_dir, recursive = TRUE)
dir.create(figures_dir, recursive = TRUE)
dir.create(sourcedata_dir, recursive = TRUE)

1 Package

library(SingleCellExperiment)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggpubr)
library(moon) # Yingxin's personal package
library(pheatmap)
library(reshape2)
library(gridExtra)
library(RColorBrewer)
library(UpSetR)
library(scattermore)
library(scater)
library(scran)
library(ggridges)
library(rcartocolor)
library(Rtsne)
library(ggalluvial)
library(ggrepel)
library(BiocParallel)
library(BiocSingular)
library(BiocNeighbors)
library(openxlsx)
library(SparseArray)
ggplot2::theme_set(theme_bw() + theme_yx() + 
                     theme(axis.text.y = element_text(color = "black"),
                           axis.text.x = element_text(color = "black")) )
rdbu <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
reds <- colorRampPalette(c("white", brewer.pal(n = 7, name = "Reds")))(100)
library(fgsea)
runFGSEA <- function(stats, pathways, scoreType = "std") {
  fgseaRes <- fgsea(pathways = pathways,
                    stats    = sort(stats),
                    minSize = 5,
                    maxSize = 10000,
                    nproc = 1,
                    scoreType = scoreType)
  fgseaRes[order(fgseaRes$ES, decreasing = TRUE),]
}
library(org.Hs.eg.db)
library(clusterProfiler)
runGO <- function(gene_set, background, maxGSSize = 500) {
  eg <- bitr(gene_set,
             fromType = "SYMBOL", 
             toType = "ENTREZID", 
             OrgDb = "org.Hs.eg.db")
  
  geneList <- bitr(background,
                   fromType = "SYMBOL", 
                   toType = "ENTREZID", 
                   OrgDb = "org.Hs.eg.db")
  
  ego_res <- enrichGO(gene = eg$ENTREZID,
                      universe = geneList$ENTREZID,
                      OrgDb = org.Hs.eg.db,
                      ont = "BP",
                      pAdjustMethod = "BH",
                      minGSSize = 10,
                      maxGSSize = maxGSSize,
                      pvalueCutoff = 1,
                      qvalueCutoff = 1,
                      readable = TRUE)
  return(ego_res)
}


library(GO.db)
library(biomaRt)
library(org.Hs.eg.db)
retrieved <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:0034976", columns="SYMBOL")
#GO:0034976
#response to endoplasmic reticulum stress
#GO: 0140467
retrieved <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:0140467", columns="SYMBOL")
sce_melanoma_all <- readRDS("source_data/final/sce_melanoma.rds")
colnames(sce_melanoma_all) <- sce_melanoma_all$Barcode
# sce_melanoma <- readRDS("data/sce_melanoma.rds")
# colnames(sce_melanoma) <- sce_melanoma$Barcode

patient_color <- tableau_color_pal()(10)
names(patient_color) <- names(table(sce_melanoma_all$Patient_publish))

sample_color <- tableau_color_pal("Tableau 20")(20)
names(sample_color) <- names(table(sce_melanoma_all$Sample_publish))
numbat_classify <- readRDS("results/numbat_tumour_classification.rds")
table(numbat_classify, sce_melanoma_all$scClassify_celltype)
##                
## numbat_classify B Cells Endothelial Cells Fibroblasts intermediate Macrophages
##          normal    9616                12        1911          971        5028
##          tumor       31                 2         945          830         176
##                
## numbat_classify Monocyte NK Cells Plasma Cells T cells Tumor cells unassigned
##          normal     1549     1084          203   28437        2171        230
##          tumor       139        7           49     166      114848        194
df_toPlot <- moon::makeMoonDF(sce_melanoma_all)
g1 <- ggplot(df_toPlot,
             aes(x = UMAP1, y = UMAP2, color = scClassify_celltype)) +
  geom_scattermore(pointsize = 1) +
  theme(aspect.ratio = 1)  +
  scale_color_manual(values = RColorBrewer::brewer.pal(11, "Paired")) +
  labs(color = "Cell types")

g2 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = numbat_classify)) +
  geom_scattermore(pointsize = 1) +
  theme(aspect.ratio = 1)  +
  scale_color_manual(values = RColorBrewer::brewer.pal(10, "Paired")[c(9, 10)]) +
  labs(color = "Numbat classification")

g3 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = paste(numbat_classify == "tumor", scClassify_celltype == "Tumor cells"))) +
  geom_scattermore(pointsize = 1) +
  theme(aspect.ratio = 1)  +
  scale_color_manual(values = RColorBrewer::brewer.pal(10, "Paired")[c(1, 2, 9, 10)]) +
  labs(color = "Numbat|scClassify")

g4 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = ifelse(numbat_classify == "tumor" & scClassify_celltype == "Tumor cells", 
                                                                 "Tumor cells", "Others"))) +
  geom_scattermore(pointsize = 1) +
  theme(aspect.ratio = 1)  +
  scale_color_manual(values = RColorBrewer::brewer.pal(10, "Paired")[c(9, 10)]) +
  labs(color = "Final")
ggarrange(g1, g2, g3, g4, ncol = 2, nrow = 2, align = "hv")

Subset the cells to melanoma cells based on both scClassify and numbat notation

sce_melanoma <- sce_melanoma_all[, sce_melanoma_all$scClassify_celltype == "Tumor cells" & numbat_classify == "tumor"]
sce_melanoma
## class: SingleCellExperiment 
## dim: 30986 114848 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(30986): FAM138A OR4F5 ... AC213203.1 FAM231C
## rowData names(3): ID Symbol Type
## colnames(114848): AAACCCAAGAAACTGT-1 AAACGAAAGGGAGTTC-1 ...
##   TTTGTTGGTTGACGGA-20 TTTGTTGTCGGTGTTA-20
## colData names(19): Sample Barcode ... Patient_publish Sample_publish
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):
sce_melanoma_patient <- sce_melanoma[, sce_melanoma$Condition == "DMSO"]
melanoma_markers <- readxl::read_xlsx("data/mmc4-2.xlsx", skip = 1)
melanoma_markers <- split(melanoma_markers$Gene, melanoma_markers$Signature)

mean_trim_low <- function(x, trim = 0) {
  mean(x[x >= quantile(x, trim)])
}

gene_scores <- lapply(melanoma_markers, function(x) {
  apply(assay(sce_melanoma_patient, "logcounts")[intersect(x, rownames(sce_melanoma_patient)), ], 2, 
        mean_trim_low, trim = 0.1)
})


IFNg <- readxl::read_xlsx("data/Hallmark_IFNg.xlsx", skip = 1)
IFNg <- unlist(c(IFNg[, 1]))
IFNg <- intersect(IFNg, rownames(sce_melanoma_patient))
IFNg_scores <- apply(logcounts(sce_melanoma_patient)[IFNg, ], 2, mean_trim_low, trim = 0.1)
Nazarian_markers <- read.csv("data/Nazarian_mapk_signature.csv", header = FALSE)
Nazarian_markers <- intersect(Nazarian_markers$V1, rownames(sce_melanoma_patient))
Nazarian_scores <- apply(logcounts(sce_melanoma_patient)[Nazarian_markers, ], 2, mean_trim_low, trim = 0.1)
cc_genes <- lapply(Seurat::cc.genes.updated.2019, function(x) intersect(x, rownames(sce_melanoma_patient)))
cc_s_scores <- apply(logcounts(sce_melanoma_patient)[cc_genes$s.genes, ], 2, mean_trim_low, trim = 0.1)
cc_g2m_scores <- apply(logcounts(sce_melanoma_patient)[cc_genes$g2m.genes, ], 2, mean_trim_low, trim = 0.1)
hallmarkList <- readRDS("data/hallmarkList.rds")
hallmarkList <- lapply(hallmarkList, function(x) intersect(x, rownames(sce_melanoma_patient)))
hallmark_scores <- lapply(hallmarkList, function(x) {
  colMeans(logcounts(sce_melanoma_patient)[intersect(x, rownames(sce_melanoma_patient)), ])
})

2 QC on melanoma cells

hvg_fit <- scran::modelGeneVar(sce_melanoma_patient, block = sce_melanoma_patient$Sample)
hvg <- scran::getTopHVGs(hvg_fit, n = 500)


set.seed(2022)
sce_melanoma_patient <- runPCA(sce_melanoma_patient, ncomponents = 20, 
                               subset_row = hvg, BPPARAM = SerialParam(), BSPARAM = RandomParam())
set.seed(2022)
sce_melanoma_patient <- runUMAP(sce_melanoma_patient, dimred = "PCA", min_dist = 0.3, verbose = TRUE)



df_toPlot <- moon::makeMoonDF(sce_melanoma_patient)
nUMI <- colSums(counts(sce_melanoma_patient))

nGene <- colSums(counts(sce_melanoma_patient) != 0)

RPLprop <- colSums(counts(sce_melanoma_patient)[grep("RPL|RPS", rownames(sce_melanoma_patient)),])/colSums(counts(sce_melanoma_patient))
remove_nUMI <- nUMI < 7000 | nGene < 2000
df_toPlot <- moon::makeMoonDF(sce_melanoma_patient)
table(cc_g2m_scores > 1)
## 
## FALSE  TRUE 
## 56341  8034
funMixModel <- function(x, mu1, mu2, sd1, sd2, rho1, rho2) {
  
  dnorm(x, mean = mu1, sd = sd1) * rho1 -
    dnorm(x, mean = mu2, sd = sd2) * rho2
  
  
}




# Function to get the threshold for correlation based on mixture model

getThreshold <- function(mixmdl, verbose = FALSE){
  
  # if (verbose) {
  #   plot(mixmdl, which = 2)
  # }
  
  membership <- apply(mixmdl$posterior, 1, which.max)
  m_list <- sort(unique(membership))
  
  mu_list <- mixmdl$mu
  names(mu_list) <- seq_len(length(mu_list))
  mu_list <- mu_list[m_list]
  
  if (length(mu_list) > 1) {
    idx1 <- as.numeric(names(mu_list)[order(mu_list)][1])
    idx2 <- as.numeric(names(mu_list)[order(mu_list)][2])
    
    root <- try(uniroot(funMixModel, interval = c(mixmdl$mu[idx1] -
                                                    mixmdl$sigma[idx1],
                                                  mixmdl$mu[idx2] +
                                                    mixmdl$sigma[idx2]),
                        mu1 = mixmdl$mu[idx1], mu2 = mixmdl$mu[idx2],
                        sd1 = mixmdl$sigma[idx1], sd2 = mixmdl$sigma[idx2],
                        rho1 = mixmdl$lambda[idx1],
                        rho2 = mixmdl$lambda[idx2]),
                silent = TRUE)
    
    
    if (!"try-error" %in%  is(root)) {
      t <- root$root
    }else{
      t <- 0
    }
    
  }else{
    t <- 0
  }
  
  return(t)
}
set.seed(2022)
mixmdl_g2m <- try(mixtools::normalmixEM(cc_g2m_scores,
                                        fast = TRUE,
                                        maxrestarts = 100,
                                        k = 2,
                                        maxit = 1000,
                                        # mu = c(-0.5, rep(0.5, length_unique_y - 2),  1),
                                        # lambda = c(1/2),
                                        # sigma = rep(0.2, length_unique_y),
                                        ECM = TRUE,
                                        verb = TRUE),
                  silent = TRUE)
## iteration = 1  log-lik diff = 59569.36  log-lik = -19674.84 
## iteration = 2  log-lik diff = 21198.26  log-lik = 1523.421 
## iteration = 3  log-lik diff = 1418.905  log-lik = 2942.326 
## iteration = 4  log-lik diff = 130.5359  log-lik = 3072.861 
## iteration = 5  log-lik diff = 18.09592  log-lik = 3090.957 
## iteration = 6  log-lik diff = 5.008508  log-lik = 3095.966 
## iteration = 7  log-lik diff = 1.895995  log-lik = 3097.862 
## iteration = 8  log-lik diff = 0.7641971  log-lik = 3098.626 
## iteration = 9  log-lik diff = 0.3112101  log-lik = 3098.937 
## iteration = 10  log-lik diff = 0.1269754  log-lik = 3099.064 
## iteration = 11  log-lik diff = 0.05183133  log-lik = 3099.116 
## iteration = 12  log-lik diff = 0.02116152  log-lik = 3099.137 
## iteration = 13  log-lik diff = 0.008640622  log-lik = 3099.146 
## iteration = 14  log-lik diff = 0.003528332  log-lik = 3099.149 
## iteration = 15  log-lik diff = 0.001440822  log-lik = 3099.151 
## iteration = 16  log-lik diff = 0.0005883851  log-lik = 3099.151 
## iteration = 17  log-lik diff = 0.0002402811  log-lik = 3099.152 
## iteration = 18  log-lik diff = 9.812552e-05  log-lik = 3099.152 
## iteration = 19  log-lik diff = 4.007255e-05  log-lik = 3099.152 
## iteration = 20  log-lik diff = 1.636492e-05  log-lik = 3099.152 
## iteration = 21  log-lik diff = 6.683149e-06  log-lik = 3099.152 
## iteration = 22  log-lik diff = 2.729299e-06  log-lik = 3099.152 
## iteration = 23  log-lik diff = 1.114597e-06  log-lik = 3099.152 
## iteration = 24  log-lik diff = 4.551912e-07  log-lik = 3099.152 
## iteration = 25  log-lik diff = 1.858862e-07  log-lik = 3099.152 
## iteration = 26  log-lik diff = 7.591734e-08  log-lik = 3099.152 
## iteration = 27  log-lik diff = 3.100286e-08  log-lik = 3099.152 
## iteration = 28  log-lik diff = 1.266017e-08  log-lik = 3099.152 
## iteration = 29  log-lik diff = 5.16593e-09  log-lik = 3099.152 
## number of iterations= 29
mixmdl_g2m_threshold <- getThreshold(mixmdl_g2m)
set.seed(2022)
mixmdl_s <- try(mixtools::normalmixEM(cc_s_scores,
                                      fast = TRUE,
                                      maxrestarts = 100,
                                      k = 2,
                                      maxit = 1000,
                                      # mu = c(-0.5, rep(0.5, length_unique_y - 2),  1),
                                      # lambda = c(1/2),
                                      # sigma = rep(0.2, length_unique_y),
                                      ECM = TRUE,
                                      verb = TRUE),
                silent = TRUE)
## iteration = 1  log-lik diff = 53419.31  log-lik = -13111.65 
## iteration = 2  log-lik diff = 20984.7  log-lik = 7873.05 
## iteration = 3  log-lik diff = 1981.623  log-lik = 9854.673 
## iteration = 4  log-lik diff = 394.7357  log-lik = 10249.41 
## iteration = 5  log-lik diff = 75.20056  log-lik = 10324.61 
## iteration = 6  log-lik diff = 18.8214  log-lik = 10343.43 
## iteration = 7  log-lik diff = 7.180167  log-lik = 10350.61 
## iteration = 8  log-lik diff = 3.433998  log-lik = 10354.04 
## iteration = 9  log-lik diff = 1.760211  log-lik = 10355.8 
## iteration = 10  log-lik diff = 0.9182721  log-lik = 10356.72 
## iteration = 11  log-lik diff = 0.4812294  log-lik = 10357.2 
## iteration = 12  log-lik diff = 0.2525403  log-lik = 10357.46 
## iteration = 13  log-lik diff = 0.1326015  log-lik = 10357.59 
## iteration = 14  log-lik diff = 0.06964569  log-lik = 10357.66 
## iteration = 15  log-lik diff = 0.03658657  log-lik = 10357.7 
## iteration = 16  log-lik diff = 0.01922231  log-lik = 10357.71 
## iteration = 17  log-lik diff = 0.0101002  log-lik = 10357.73 
## iteration = 18  log-lik diff = 0.005307422  log-lik = 10357.73 
## iteration = 19  log-lik diff = 0.002789063  log-lik = 10357.73 
## iteration = 20  log-lik diff = 0.001465711  log-lik = 10357.73 
## iteration = 21  log-lik diff = 0.0007702814  log-lik = 10357.74 
## iteration = 22  log-lik diff = 0.0004048169  log-lik = 10357.74 
## iteration = 23  log-lik diff = 0.000212752  log-lik = 10357.74 
## iteration = 24  log-lik diff = 0.0001118132  log-lik = 10357.74 
## iteration = 25  log-lik diff = 5.876455e-05  log-lik = 10357.74 
## iteration = 26  log-lik diff = 3.088445e-05  log-lik = 10357.74 
## iteration = 27  log-lik diff = 1.623177e-05  log-lik = 10357.74 
## iteration = 28  log-lik diff = 8.530886e-06  log-lik = 10357.74 
## iteration = 29  log-lik diff = 4.483532e-06  log-lik = 10357.74 
## iteration = 30  log-lik diff = 2.356421e-06  log-lik = 10357.74 
## iteration = 31  log-lik diff = 1.238455e-06  log-lik = 10357.74 
## iteration = 32  log-lik diff = 6.508781e-07  log-lik = 10357.74 
## iteration = 33  log-lik diff = 3.42101e-07  log-lik = 10357.74 
## iteration = 34  log-lik diff = 1.797889e-07  log-lik = 10357.74 
## iteration = 35  log-lik diff = 9.450014e-08  log-lik = 10357.74 
## iteration = 36  log-lik diff = 4.965113e-08  log-lik = 10357.74 
## iteration = 37  log-lik diff = 2.610614e-08  log-lik = 10357.74 
## iteration = 38  log-lik diff = 1.372246e-08  log-lik = 10357.74 
## iteration = 39  log-lik diff = 7.203198e-09  log-lik = 10357.74 
## number of iterations= 39
mixmdl_s_threshold <- getThreshold(mixmdl_s)
set.seed(2022)
cc_scores <- cc_s_scores + cc_g2m_scores

mixmdl_cc <- try(mixtools::normalmixEM(cc_scores,
                                       fast = TRUE,
                                       maxrestarts = 100,
                                       k = 2,
                                       maxit = 1000,
                                       # mu = c(-0.5, rep(0.5, length_unique_y - 2),  1),
                                       # lambda = c(1/2),
                                       # sigma = rep(0.2, length_unique_y),
                                       ECM = TRUE,
                                       verb = TRUE),
                 silent = TRUE)
## iteration = 1  log-lik diff = 54107.81  log-lik = -56252.45 
## iteration = 2  log-lik diff = 22788.71  log-lik = -33463.73 
## iteration = 3  log-lik diff = 1935.718  log-lik = -31528.01 
## iteration = 4  log-lik diff = 238.6751  log-lik = -31289.34 
## iteration = 5  log-lik diff = 25.48529  log-lik = -31263.85 
## iteration = 6  log-lik diff = 3.953839  log-lik = -31259.9 
## iteration = 7  log-lik diff = 1.354638  log-lik = -31258.55 
## iteration = 8  log-lik diff = 0.674628  log-lik = -31257.87 
## iteration = 9  log-lik diff = 0.3616927  log-lik = -31257.51 
## iteration = 10  log-lik diff = 0.1961173  log-lik = -31257.31 
## iteration = 11  log-lik diff = 0.1065305  log-lik = -31257.21 
## iteration = 12  log-lik diff = 0.05788899  log-lik = -31257.15 
## iteration = 13  log-lik diff = 0.03146127  log-lik = -31257.12 
## iteration = 14  log-lik diff = 0.01709973  log-lik = -31257.1 
## iteration = 15  log-lik diff = 0.009294462  log-lik = -31257.09 
## iteration = 16  log-lik diff = 0.005052139  log-lik = -31257.09 
## iteration = 17  log-lik diff = 0.002746237  log-lik = -31257.08 
## iteration = 18  log-lik diff = 0.001492826  log-lik = -31257.08 
## iteration = 19  log-lik diff = 0.0008114962  log-lik = -31257.08 
## iteration = 20  log-lik diff = 0.0004411319  log-lik = -31257.08 
## iteration = 21  log-lik diff = 0.0002398025  log-lik = -31257.08 
## iteration = 22  log-lik diff = 0.0001303592  log-lik = -31257.08 
## iteration = 23  log-lik diff = 7.086492e-05  log-lik = -31257.08 
## iteration = 24  log-lik diff = 3.852319e-05  log-lik = -31257.08 
## iteration = 25  log-lik diff = 2.094182e-05  log-lik = -31257.08 
## iteration = 26  log-lik diff = 1.138433e-05  log-lik = -31257.08 
## iteration = 27  log-lik diff = 6.188708e-06  log-lik = -31257.08 
## iteration = 28  log-lik diff = 3.364294e-06  log-lik = -31257.08 
## iteration = 29  log-lik diff = 1.828896e-06  log-lik = -31257.08 
## iteration = 30  log-lik diff = 9.942196e-07  log-lik = -31257.08 
## iteration = 31  log-lik diff = 5.404763e-07  log-lik = -31257.08 
## iteration = 32  log-lik diff = 2.938104e-07  log-lik = -31257.08 
## iteration = 33  log-lik diff = 1.597182e-07  log-lik = -31257.08 
## iteration = 34  log-lik diff = 8.683128e-08  log-lik = -31257.08 
## iteration = 35  log-lik diff = 4.720641e-08  log-lik = -31257.08 
## iteration = 36  log-lik diff = 2.565866e-08  log-lik = -31257.08 
## iteration = 37  log-lik diff = 1.394801e-08  log-lik = -31257.08 
## iteration = 38  log-lik diff = 7.585186e-09  log-lik = -31257.08 
## number of iterations= 38
mixmdl_cc_threshold <- getThreshold(mixmdl_cc)
mixmdl_g2m_threshold
## [1] 0.5013625
mixmdl_s_threshold
## [1] 0.4217393
cell_cycle_cells <- cc_g2m_scores > mixmdl_g2m_threshold | cc_s_scores > mixmdl_s_threshold 

table(cell_cycle_cells)
## cell_cycle_cells
## FALSE  TRUE 
## 47587 16788
inflammatroy_cells <- hallmark_scores$HALLMARK_INFLAMMATORY_RESPONSE > 0.4
table(cell_cycle_cells)
## cell_cycle_cells
## FALSE  TRUE 
## 47587 16788
table(inflammatroy_cells)
## inflammatroy_cells
## FALSE  TRUE 
## 62646  1729
table(remove_nUMI)
## remove_nUMI
## FALSE  TRUE 
## 50353 14022

Remove the cells that are in cell cycle, inflamatory and with low UMI

pure_cells <- !cell_cycle_cells & !inflammatroy_cells & !remove_nUMI
table(pure_cells)
## pure_cells
## FALSE  TRUE 
## 30807 33568
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = pure_cells)) +
  geom_scattermore(pointsize = 1) +
  theme(aspect.ratio = 1) +
  scale_color_viridis_d()

cc_prop <- table(cell_cycle_cells, sce_melanoma_patient$Sample)
round(apply(cc_prop, 2, function(x) x/sum(x)), 2)
##                 
## cell_cycle_cells SCC16_0069_B230216_DMSO SCC16_0500_B101017_DMSO
##            FALSE                    0.87                     0.3
##            TRUE                     0.13                     0.7
##                 
## cell_cycle_cells SCC20_0352_B291020_DMSO SMU09_0157_B110419_DMSO
##            FALSE                    0.97                    0.92
##            TRUE                     0.03                    0.08
##                 
## cell_cycle_cells SMU17_0075_B070921_DMSO SMU17_0132_B180418_DMSO
##            FALSE                    0.75                    0.72
##            TRUE                     0.25                    0.28
##                 
## cell_cycle_cells SMU17_0209_B130617_DMSO SMU18_0017_B110518_DMSO
##            FALSE                     0.6                    0.79
##            TRUE                      0.4                    0.21
##                 
## cell_cycle_cells SMU18_0283_B090119_DMSO SMU19_0306_B290620_DMSO
##            FALSE                    0.87                    0.74
##            TRUE                     0.13                    0.26
sce_melanoma_patient_pure <- sce_melanoma_patient[, pure_cells]
sce_melanoma_patient_pure
## class: SingleCellExperiment 
## dim: 30986 33568 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(30986): FAM138A OR4F5 ... AC213203.1 FAM231C
## rowData names(3): ID Symbol Type
## colnames(33568): AAACGAAAGGGAGTTC-1 AAACGCTGTTTCGTAG-1 ...
##   TTTGTTGGTGGCTTGC-19 TTTGTTGTCGGTTCAA-19
## colData names(19): Sample Barcode ... Patient_publish Sample_publish
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):

3 scMerge2 on DMSO tumour cells

library(inline)
openblas.set.num.threads <- cfunction( signature(ipt="integer"),
                                       body = 'openblas_set_num_threads(*ipt);',
                                       otherdefs = c ('extern void openblas_set_num_threads(int);'),
                                       libargs = c ('/usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3'),
                                       language = "C",
                                       convention = ".C"
)
openblas.set.num.threads(1)
## $ipt
## [1] 1
library(HDF5Array)
library(rhdf5)

library(scMerge)

data(segList)
batch_label <- sce_melanoma_patient_pure$Patient
ctl_genes <- intersect(rownames(sce_melanoma_patient_pure), segList$human$human_scSEG)
ncores <- 10

hvg <- intersect(unlist(melanoma_markers), rownames(sce_melanoma_patient_pure))
hvg <- setdiff(hvg, c("GDF15", "LGALS3"))
start <- Sys.time()
scMerge_res <- scMerge2(exprsMat = logcounts(sce_melanoma_patient_pure),
                        batch = batch_label,
                        cellTypes = NULL,
                        condition = NULL,
                        use_bpparam = BiocParallel::MulticoreParam(workers = ncores),
                        use_bsparam = BiocSingular::RandomParam(),
                        use_bnparam = BiocNeighbors::AnnoyParam(),
                        ruvK = 20,
                        ctl = ctl_genes,
                        exprsMat_counts = NULL,
                        k_celltype = 10,
                        chosen.hvg = hvg,
                        cosineNorm = FALSE,
                        return_subset = FALSE,
                        seed = 1)
## [1] "Cluster within batch"
## [1] "Constructing pseudo-bulk"
## Dimension of pseudo-bulk expression: [1] 30986  1678
## [1] "Identifying MNC using pseudo-bulk:"

## [1] "Running RUV"
end <- Sys.time()

scMerge_res$newY[scMerge_res$newY < 0.001] <- 0
#scMerge_res$newY <- as(scMerge_res$newY, "dgCMatrix")

assay(sce_melanoma_patient_pure, "scMerge") <- scMerge_res$newY

saveRDS(scMerge_res, file = file.path(sourcedata_dir, "scMerge_res_melanoma_tumor_pure.rds"))

filter <- grepl("RPL|RPS|^MT-|ERCC|MTMR|MTND", rownames(sce_melanoma_patient_pure))
gene_corMat <- qlcMatrix::cosSparse(t((logcounts(sce_melanoma_patient_pure)[filter, ])), 
                                    t((logcounts(sce_melanoma_patient_pure)[!filter, ])))
gene_corMat_max <- apply(gene_corMat, 2, max, na.rm = TRUE)
exclude_genes <- c(rownames(sce_melanoma_patient_pure)[filter], 
                   names(gene_corMat_max)[gene_corMat_max > 0.8])
filter <- rownames(sce_melanoma_patient_pure) %in% exclude_genes

feature_selection <- function(exprsMat, batch, top_n = 2000, BPPARAM = SerialParam()) {
  combined.dec <- scran::modelGeneVar(exprsMat, block = batch, BPPARAM = BPPARAM)
  chosen.hvgs <- scran::getTopHVGs(combined.dec, n = top_n)
  return(chosen.hvgs)
}



chosen.hvg <- feature_selection(logcounts(sce_melanoma_patient_pure),
                                sce_melanoma_patient_pure$Sample,
                                top_n = 500)
chosen.hvg <- setdiff(chosen.hvg, exclude_genes)
length(chosen.hvg)
## [1] 483
set.seed(2022)
sce_melanoma_patient_pure <- runPCA(sce_melanoma_patient_pure, 
                                    ncomponents = 10, 
                                    #subset_row = setdiff(chosen.hvg, exclude_genes), 
                                    BPPARAM = SerialParam(), 
                                    BSPARAM = RandomParam(),
                                    exprs_values = "scMerge")

set.seed(2022)
sce_melanoma_patient_pure <- runUMAP(sce_melanoma_patient_pure, dimred = "PCA", min_dist = 0.3, verbose = TRUE)
df_toPlot <- moon::makeMoonDF(sce_melanoma_patient_pure)
df_toPlot$Sample_publish <- factor(df_toPlot$Sample_publish, levels = unique(df_toPlot$Sample_publish))
df_toPlot$Patient_publish <- factor(df_toPlot$Patient_publish, levels = unique(df_toPlot$Patient_publish))
g1 <- ggplot(df_toPlot[sample(nrow(df_toPlot)),], aes(x = UMAP1, y = UMAP2, color = Patient_publish)) +
  geom_scattermore(pointsize = 2) +
  theme(aspect.ratio = 1) +
  scale_color_manual(values = patient_color)


g1

ggsave(file.path(figures_dir, "UMAP_melanoma_reference.pdf"), width = 6, height = 5)

g1 + facet_wrap(~Patient_publish, ncol = 5)

ggsave(file.path(figures_dir, "UMAP_melanoma_reference_bySample.pdf"), width = 14, height = 5)

saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "Patient_publish")], 
        file = file.path(sourcedata_dir,
                         "UMAP_melanoma_reference_bySample.rds"))
seurat_obj <- Seurat::CreateSeuratObject(counts = counts(sce_melanoma_patient_pure))
seurat_obj@assays$RNA@data <- logcounts(sce_melanoma_patient_pure)
for(i in names(melanoma_markers))  {
  seurat_obj <- Seurat::AddModuleScore(seurat_obj, list(intersect(rownames(seurat_obj), 
                                                                  melanoma_markers[[i]])),
                                       name = i)
}

seurat_module_scores <- seurat_obj@meta.data[, -c(1:3)]
colnames(seurat_module_scores) <- names(melanoma_markers)
g <- lapply(names(gene_scores)[c(1, 4, 2, 6)], function(x) {
  #df_toPlot$scores <- gene_scores[[x]][colnames(sce_melanoma_patient_pure)]
  df_toPlot$scores <- seurat_module_scores[colnames(sce_melanoma_patient_pure), x]
  ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, 
                        color = scale(scores))) +
    geom_scattermore(pointsize = 2) +
    theme(aspect.ratio = 1, legend.position = "bottom") +
    scale_color_gradient2(  low = "#2166AC",
                            mid = "white",
                            high = "#B2182B") +
    labs(color = x)
})

ggarrange(plotlist = g, ncol = 4, nrow = 1, align = "hv")

ggsave(file.path(figures_dir, "UMAP_melanoma_reference_byScore.pdf"), width = 12, height = 5)

saveRDS(cbind(df_toPlot[, c("UMAP1", "UMAP2")],
              apply(seurat_module_scores, 2, scale)),
        file = file.path(sourcedata_dir,
                         "UMAP_melanoma_reference_byScore.rds"))
g <- lapply(names(hallmark_scores), function(x) {
  df_toPlot$scores <- hallmark_scores[[x]][colnames(sce_melanoma_patient_pure)]
  ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, 
                        color = scores)) +
    geom_scattermore(pointsize = 4) +
    theme(aspect.ratio = 1, legend.position = "bottom") +
    scale_color_viridis_c() +
    labs(color = x)
})

ggarrange(plotlist = g, ncol = 3, nrow = 3, align = "hv")
## $`1`

## 
## $`2`

## 
## $`3`

## 
## $`4`

## 
## $`5`

## 
## $`6`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

4 Clustering on DMSO cells (Coarse)

set.seed(2022)
sce_melanoma_patient_pure <- runPCA(sce_melanoma_patient_pure, ncomponents = 10,
                                    subset_row = hvg,
                                    BPPARAM = SerialParam(),
                                    BSPARAM = RandomParam(),
                                    exprs_values = "scMerge",
                                    name = "PCA_scMerge")

set.seed(2022)
g <- scran::buildSNNGraph(sce_melanoma_patient_pure, k = 30, use.dimred = "PCA_scMerge")
set.seed(2022)
sample_cluster_coarse <- igraph::cluster_louvain(g)$membership
sample_cluster_coarse <- factor(sample_cluster_coarse)
sample_cluster_coarse[sample_cluster_coarse %in% c(6, 7)] <- 6
sample_cluster_coarse[sample_cluster_coarse %in% c(1, 5, 4)] <- 1
sce_melanoma_patient_pure$sample_cluster_coarse <- sample_cluster_coarse
sce_melanoma_patient_pure$sample_cluster_coarse <- droplevels(sce_melanoma_patient_pure$sample_cluster_coarse)
sce_melanoma_patient_pure <- sce_melanoma_patient_pure[rowSums(counts(sce_melanoma_patient_pure)) != 0, ]
marker_cluster <- cellyx::doLimma(logcounts(sce_melanoma_patient_pure), sce_melanoma_patient_pure$sample_cluster_coarse)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
marker_cluster <- lapply(marker_cluster, function(x) x[order(x$logFC, decreasing = TRUE), ])
gene_scores_mat <- do.call(cbind, lapply(gene_scores, function(x) x))
gene_scores_mat_agg <- apply(gene_scores_mat[colnames(sce_melanoma_patient_pure), ], 2, function(x) aggregate(x, list(sce_melanoma_patient_pure$sample_cluster_coarse), mean)$x)
rownames(gene_scores_mat_agg) <- levels(sce_melanoma_patient_pure$sample_cluster_coarse)



pheatmap(scale(gene_scores_mat_agg[, names(gene_scores)[c(1, 5, 4, 3, 2, 7, 6)]]), 
         cluster_cols = FALSE, 
         cluster_rows = FALSE,
         scale = "row", 
         border_color = NA,
         color = rdbu,
         main = "Average expression of gene list (raw scores)")

cluster_order <- c(4, 2, 1, 3)
pheatmap(gene_scores_mat_agg[cluster_order,
                             names(gene_scores)[c(1, 5, 4, 3, 2, 7, 6)]],
         cluster_cols = FALSE,
         cluster_rows = FALSE,
         # scale = "column",
         border_color = NA,
         color = reds,
         main = "Average expression of gene list (raw scores)")

pheatmap(scale(gene_scores_mat_agg[cluster_order,
                                   names(gene_scores)[c(1, 5, 4, 3, 2, 7, 6)]]),
         cluster_cols = FALSE,
         cluster_rows = FALSE,
         scale = "row",
         border_color = NA,
         color = rdbu,
         main = "Average expression of gene list (scaled scores)")

sample_cluster_factor <- factor(sce_melanoma_patient_pure$sample_cluster_coarse,
                                levels = cluster_order)
sample_cluster_annotation <- plyr::mapvalues(sce_melanoma_patient_pure$sample_cluster_coarse,
                                             from = rownames(gene_scores_mat_agg)[cluster_order],
                                             to = c(names(gene_scores)[c(1, 4, 2, 6)]))
sample_cluster_annotation <- factor(sample_cluster_annotation,
                                    levels =  c(names(gene_scores)[c(1, 4, 2, 6)]))
melanoma_color_coarse <- RColorBrewer::brewer.pal(8, "Dark2")[c(4, 6, 5, 3, 8)]
names(melanoma_color_coarse) <- c(names(gene_scores)[c(1, 4, 2, 6)], "unassigned")
sce_melanoma_patient_pure$sample_cluster_annotation <- sample_cluster_annotation
df_toPlot$sample_cluster_annotation <- sample_cluster_annotation


ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = sample_cluster_annotation)) +
  geom_scattermore(pointsize = 3) +
  theme(aspect.ratio = 1) +
  scale_color_manual(values = melanoma_color_coarse) +
  labs(color = " ")

ggsave(file.path(figures_dir, "UMAP_reference_celltype_coarse_celltype_annotation.pdf"), width = 6, height = 5)
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "sample_cluster_annotation")],
        file = file.path(sourcedata_dir, "UMAP_reference_celltype_coarse_celltype_annotation.rds"))



saveRDS(sample_cluster_annotation,
        file.path(sourcedata_dir, "DMSO_pure_sample_cluster_annotation_coarse.rds"))
g <- lapply(c("SOX10", "AXL", "MITF", "NGFR"), 
            function(x) {
              o <- order(logcounts(sce_melanoma_patient_pure)[x, ], decreasing = FALSE)
              ggplot(df_toPlot[o, ], aes(x = UMAP1, y = UMAP2, 
                                         color = logcounts(sce_melanoma_patient_pure)[x, o])) +
                geom_scattermore(pointsize = 5) +
                theme(aspect.ratio = 1) +
                scale_color_gradientn(colours = reds,
                                      limits = c(0, 4)) +
                labs(color = "", title = x) 
              
            })
ggarrange(plotlist = g, ncol = 2, nrow = 2, align = "hv")

ggsave(file.path(figures_dir, "tumor_reference_celltype_marker_example.pdf"), width = 12, height = 10)
gene_scores_mat_agg <- apply(seurat_module_scores[colnames(sce_melanoma_patient_pure), ], 2, function(x) aggregate(x, list(sce_melanoma_patient_pure$sample_cluster_annotation), mean)$x)
rownames(gene_scores_mat_agg) <- levels(sce_melanoma_patient_pure$sample_cluster_annotation)



pheatmap(scale(gene_scores_mat_agg[, names(gene_scores)[c(1, 5, 4, 3, 2, 7, 6)]]),
         cluster_cols = FALSE,
         cluster_rows = FALSE,
         scale = "row",
         border_color = NA,
         color = rdbu,
         main = "Average expression of gene list (scaled scores)",
         file = file.path(figures_dir, "reference_celltype_coarse_celltype_gene_scores_heatmap.pdf"),
         width = 5, height = 4)
saveRDS(sce_melanoma_patient_pure, file = file.path(sourcedata_dir, "sce_melanoma_pure.rds"))
scClassify_tumour_prediction <- readRDS("source_data/final/scClassify_tumour_prediction_coarse.rds")

sce_melanoma$scClassify_tumour_prediction <- scClassify_tumour_prediction[, 3]
sce_melanoma$scClassify_tumour_prediction[sce_melanoma$scClassify_tumour_prediction == "Neural crest-like_Undifferentiated"] <- "intermediate"
df_toPlot <- moon::makeMoonDF(sce_melanoma)


ggplot(df_toPlot[!df_toPlot$scClassify_tumour_prediction %in% c("intermediate", "unassigned"), ], 
       aes(x = factor(Sample_publish, levels = rev(sort(unique(df_toPlot$Sample_publish)))),  
           fill = factor(scClassify_tumour_prediction, levels = names(melanoma_color_coarse)))) +
  geom_bar(position = "fill") +
  theme(aspect.ratio = 0.4) +
  scale_fill_manual(values = melanoma_color_coarse) +
  coord_flip() +
  facet_grid(factor(Sample.type, levels = rev(names(table(df_toPlot$Sample.type))))~., scale = "free") +
  xlab("") +
  labs(fill = "") +
  ylab("Cell type proportion")

ggsave(file.path(figures_dir, "melanoma_celltype_proportion_bar_fill_rename.pdf"), width = 8, height = 4)



ggplot(df_toPlot[!df_toPlot$scClassify_tumour_prediction %in% c("intermediate", "unassigned"), ], 
       aes(x = factor(Sample_publish, levels = rev(sort(unique(df_toPlot$Sample_publish)))),  
           fill = factor(scClassify_tumour_prediction, levels = names(melanoma_color_coarse)))) +
  geom_bar() +
  theme(aspect.ratio = 0.4) +
  scale_fill_manual(values = melanoma_color_coarse) +
  coord_flip() +
  facet_grid(factor(Sample.type, levels = rev(names(table(df_toPlot$Sample.type))))~., scale = "free") +
  xlab("") +
  labs(fill = "") +
  ylab("Cell type proportion")

ggsave(file.path(figures_dir, "melanoma_celltype_proportion_bar_nonfill_rename.pdf"), width = 8, height = 4)
celltype_prop <- table(df_toPlot[!df_toPlot$scClassify_tumour_prediction %in% c("intermediate", "unassigned"), ]$scClassify_tumour_prediction,
                       df_toPlot[!df_toPlot$scClassify_tumour_prediction %in% c("intermediate", "unassigned"), ]$Sample)
celltype_prop <- apply(celltype_prop, 2, function(x) x/sum(x))
celltype_prop <- melt(celltype_prop)
colnames(celltype_prop) <- c("Celltype", "Sample", "Prop")
sample_meta <- unique(df_toPlot[, c("Sample", "Sample.type", "Condition", "Patient", 
                                    "Sample_publish", "Patient_publish")])
celltype_prop <- merge(celltype_prop, sample_meta, by = "Sample")
ggplot(celltype_prop, aes(x = Condition, y = Prop * 100, color = Sample.type)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_wrap(~factor(Celltype, levels = names(melanoma_color_coarse)), nrow = 1, scale = "free") +
  scale_color_brewer(palette = "Dark2") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion") 

ggsave(file.path(figures_dir, "tumor_celltype_proportion_dots.pdf"), width = 8, height = 3)
saveRDS(celltype_prop, 
        file = file.path(sourcedata_dir, "tumor_celltype_proportion_dots.rds"))
celltype_group <- df_toPlot[!df_toPlot$scClassify_tumour_prediction %in% c("intermediate", "unassigned"), ]$scClassify_tumour_prediction
celltype_group <- ifelse(celltype_group %in% c("Melanocytic", "Transitory"),
                         c("Melanocytic/Transitory"),
                         c("Neural crest-like/Undifferentiated"))
celltype_prop2 <- table(celltype_group,
                        df_toPlot[!df_toPlot$scClassify_tumour_prediction %in% c("intermediate", "unassigned"), ]$Sample)
celltype_prop2 <- apply(celltype_prop2, 2, function(x) x/sum(x))
celltype_prop2 <- melt(celltype_prop2)
colnames(celltype_prop2) <- c("Celltype", "Sample", "Prop")
sample_meta <- unique(df_toPlot[, c("Sample", "Sample.type", "Condition", "Patient",
                                    "Sample_publish", "Patient_publish")])
celltype_prop2 <- merge(celltype_prop2, sample_meta, by = "Sample")
ggplot(celltype_prop2, aes(x = Condition, y = Prop * 100, color = Sample.type)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_wrap(~Celltype, nrow = 1, scale = "free") +
  scale_color_brewer(palette = "Dark2") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion")

# ggsave(file.path(figures_dir, "tumor_celltype_proportion_dots.pdf"), width = 13, height = 4)
# saveRDS(celltype_prop,
#         file = file.path(sourcedata_dir, "tumor_celltype_proportion_dots.rds"))
ggplot(celltype_prop2, aes(x = Condition, y = Prop * 100, color = Sample.type)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_grid(Celltype~Sample.type, scale = "free") +
  scale_color_brewer(palette = "Dark2") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion")

lmer_res <- list()
for (i in levels(celltype_prop$Celltype)) {
  df_subset <- celltype_prop[celltype_prop$Celltype == i, ]
  
  lmer_res[[i]] <- lmerTest::lmer(Prop ~ Sample.type * Condition + (1|Patient), data = df_subset)
}


lapply(lmer_res, function(x) round(summary(x)$coefficients, 3))
## $Melanocytic
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.000      0.158 8.003   0.003
## Sample.typeTreatment naïve                0.362      0.223 8.003   1.623
## ConditionDT                               0.004      0.004 8.000   1.045
## Sample.typeTreatment naïve:ConditionDT    0.008      0.006 8.000   1.477
##                                        Pr(>|t|)
## (Intercept)                               0.998
## Sample.typeTreatment naïve                0.143
## ConditionDT                               0.327
## Sample.typeTreatment naïve:ConditionDT    0.178
## 
## $`Neural crest-like`
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.387      0.125 11.27   3.097
## Sample.typeTreatment naïve               -0.297      0.177 11.27  -1.681
## ConditionDT                               0.014      0.105  8.00   0.133
## Sample.typeTreatment naïve:ConditionDT    0.038      0.148  8.00   0.260
##                                        Pr(>|t|)
## (Intercept)                               0.010
## Sample.typeTreatment naïve                0.120
## ConditionDT                               0.897
## Sample.typeTreatment naïve:ConditionDT    0.802
## 
## $Transitory
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.116      0.138 8.971   0.840
## Sample.typeTreatment naïve                0.140      0.195 8.971   0.718
## ConditionDT                               0.137      0.066 8.000   2.066
## Sample.typeTreatment naïve:ConditionDT   -0.110      0.094 8.000  -1.181
##                                        Pr(>|t|)
## (Intercept)                               0.423
## Sample.typeTreatment naïve                0.491
## ConditionDT                               0.073
## Sample.typeTreatment naïve:ConditionDT    0.272
## 
## $Undifferentiated
##                                        Estimate Std. Error    df t value
## (Intercept)                               0.497      0.147 9.622   3.384
## Sample.typeTreatment naïve               -0.205      0.208 9.622  -0.988
## ConditionDT                              -0.155      0.089 8.000  -1.730
## Sample.typeTreatment naïve:ConditionDT    0.063      0.127 8.000   0.502
##                                        Pr(>|t|)
## (Intercept)                               0.007
## Sample.typeTreatment naïve                0.347
## ConditionDT                               0.122
## Sample.typeTreatment naïve:ConditionDT    0.629
lmer_res <- list()
for (i in levels(celltype_prop$Celltype)) {
  df_subset <- celltype_prop[celltype_prop$Celltype == i, ]
  
  lmer_res[[i]] <- lmerTest::lmer(Prop ~ Sample.type + Condition + (1|Patient), data = df_subset)
}


lapply(lmer_res, function(x) round(summary(x)$coefficients, 3))
## $Melanocytic
##                            Estimate Std. Error    df t value Pr(>|t|)
## (Intercept)                  -0.002      0.158 8.001  -0.011    0.992
## Sample.typeTreatment naïve    0.366      0.223 8.000   1.642    0.139
## ConditionDT                   0.008      0.003 9.000   2.777    0.021
## 
## $`Neural crest-like`
##                            Estimate Std. Error    df t value Pr(>|t|)
## (Intercept)                   0.377      0.119 9.528   3.178    0.010
## Sample.typeTreatment naïve   -0.278      0.160 8.000  -1.732    0.121
## ConditionDT                   0.033      0.070 9.000   0.473    0.647
## 
## $Transitory
##                            Estimate Std. Error    df t value Pr(>|t|)
## (Intercept)                   0.144      0.136 8.509   1.055    0.320
## Sample.typeTreatment naïve    0.085      0.189 8.000   0.448    0.666
## ConditionDT                   0.081      0.048 9.000   1.704    0.123
## 
## $Undifferentiated
##                            Estimate Std. Error    df t value Pr(>|t|)
## (Intercept)                   0.481      0.143 8.752   3.362    0.009
## Sample.typeTreatment naïve   -0.173      0.198 8.000  -0.877    0.406
## ConditionDT                  -0.123      0.061 9.000  -2.032    0.073

5 Nazarian scores before and after

Nazarian_scores <- apply(logcounts(sce_melanoma)[Nazarian_markers, ], 2, mean_trim_low, trim = 0.1)

df_agg_naz <- aggregate(Nazarian_scores[colnames(sce_melanoma)],
                        list(df_toPlot$Sample_publish,
                             df_toPlot$scClassify_tumour_prediction),
                        mean)
colnames(df_agg_naz) <- c("Sample_publish", "Celltype", "Nazarian_scores")


meta_data_publish <- colData(sce_melanoma)[, c("Patient_publish",
                                               "Sample_publish",
                                               "Patient",
                                               "Sample",
                                               "Sample.type",
                                               "Condition")]
meta_data_publish <- unique(meta_data_publish)
meta_data_publish <- data.frame(meta_data_publish)


df_agg_naz <- merge(df_agg_naz, celltype_prop)

ggplot(df_agg_naz[df_agg_naz$Celltype %in% setdiff(names(melanoma_color_coarse), "unassigned") & df_agg_naz$Prop > 0.01, ], 
       aes(x = Condition, y = Nazarian_scores, color = Sample.type)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_wrap(~factor(Celltype, levels = names(melanoma_color_coarse)), nrow = 1, scale = "free") +
  scale_color_brewer(palette = "Dark2") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion") 

ggsave(file.path(figures_dir, "tumor_nazarian_scores_dots_bySampleType.pdf"), width = 8, height = 3)
saveRDS(df_agg_naz, 
        file = file.path(sourcedata_dir, "tumor_nazarian_scores_dots_bySampleType.rds"))

ggplot(df_agg_naz[df_agg_naz$Celltype %in% setdiff(names(melanoma_color_coarse), "unassigned") & df_agg_naz$Prop > 0.01, ], 
       aes(x = Condition, y = Nazarian_scores, color = Patient)) +
  geom_line(aes(group = Patient), color = "black") +
  geom_point() +
  facet_wrap(~factor(Celltype, levels = names(melanoma_color_coarse)), nrow = 1, scale = "free") +
  scale_color_tableau(palette = "Tableau 10") +
  theme(aspect.ratio = 1, legend.position = "bottom") +
  ylab("Cell type proportion") 

ggsave(file.path(figures_dir, "tumor_nazarian_scores_dots_byPatient.pdf"), width = 8, height = 3)
saveRDS(df_agg_naz, 
        file = file.path(sourcedata_dir, "tumor_nazarian_scores_dots_bySampleType.rds"))
saveRDS(sce_melanoma, file = file.path(sourcedata_dir, "sce_melanoma_tumor.rds"))

6 Session Info

sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.21.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: Australia/Sydney
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] scMerge_1.16.0              HDF5Array_1.28.1           
##  [3] rhdf5_2.44.0                DelayedArray_0.26.7        
##  [5] inline_0.3.19               biomaRt_2.56.1             
##  [7] GO.db_3.17.0                clusterProfiler_4.8.2      
##  [9] org.Hs.eg.db_3.17.0         AnnotationDbi_1.62.2       
## [11] fgsea_1.26.0                SparseArray_1.4.8          
## [13] S4Arrays_1.4.1              abind_1.4-5                
## [15] Matrix_1.6-0                openxlsx_4.2.5.2           
## [17] BiocNeighbors_1.18.0        BiocSingular_1.16.0        
## [19] BiocParallel_1.34.2         ggrepel_0.9.3              
## [21] ggalluvial_0.12.5           Rtsne_0.16                 
## [23] rcartocolor_2.1.1           ggridges_0.5.4             
## [25] scran_1.28.2                scater_1.28.0              
## [27] scuttle_1.10.2              scattermore_1.2            
## [29] UpSetR_1.4.0                RColorBrewer_1.1-3         
## [31] gridExtra_2.3               reshape2_1.4.4             
## [33] pheatmap_1.0.12             moon_0.1.0                 
## [35] ggpubr_0.6.0                ggthemes_4.2.4             
## [37] ggplot2_3.4.3               dplyr_1.1.2                
## [39] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
## [41] Biobase_2.60.0              GenomicRanges_1.52.0       
## [43] GenomeInfoDb_1.36.1         IRanges_2.34.1             
## [45] S4Vectors_0.38.1            BiocGenerics_0.46.0        
## [47] MatrixGenerics_1.12.3       matrixStats_1.0.0          
## 
## loaded via a namespace (and not attached):
##   [1] igraph_1.5.0.1                ica_1.0-3                    
##   [3] plotly_4.10.2                 Formula_1.2-5                
##   [5] zlibbioc_1.46.0               tidyselect_1.2.0             
##   [7] bit_4.0.5                     lattice_0.22-6               
##   [9] M3Drop_1.26.0                 DoubletFinder_2.0.3          
##  [11] blob_1.2.4                    stringr_1.5.0                
##  [13] parallel_4.4.0                png_0.1-8                    
##  [15] ResidualMatrix_1.10.0         cli_3.6.1                    
##  [17] ggplotify_0.1.2               goftest_1.2-3                
##  [19] textshaping_0.3.6             kernlab_0.9-32               
##  [21] bluster_1.10.0                purrr_1.0.1                  
##  [23] densEstBayes_1.0-2.2          uwot_0.1.16                  
##  [25] shadowtext_0.1.2              curl_5.0.1                   
##  [27] mime_0.12                     evaluate_0.21                
##  [29] tidytree_0.4.5                leiden_0.4.3                 
##  [31] V8_4.3.3                      stringi_1.7.12               
##  [33] backports_1.4.1               lmerTest_3.1-3               
##  [35] XML_3.99-0.14                 httpuv_1.6.11                
##  [37] magrittr_2.0.3                rappdirs_0.3.3               
##  [39] splines_4.4.0                 ggraph_2.1.0                 
##  [41] sctransform_0.3.5             ggbeeswarm_0.7.2             
##  [43] DBI_1.1.3                     jquerylib_0.1.4              
##  [45] withr_2.5.0                   systemfonts_1.0.4            
##  [47] enrichplot_1.20.0             lmtest_0.9-40                
##  [49] bdsmatrix_1.3-7               tidygraph_1.2.3              
##  [51] BiocManager_1.30.23           htmlwidgets_1.6.2            
##  [53] segmented_1.6-4               labeling_0.4.2               
##  [55] cellyx_0.1.0                  cellranger_1.1.0             
##  [57] DEoptimR_1.1-1                mixtools_2.0.0               
##  [59] sparsesvd_0.2-2               reticulate_1.30              
##  [61] zoo_1.8-12                    XVector_0.40.0               
##  [63] knitr_1.43                    docopt_0.7.1                 
##  [65] fansi_1.0.4                   patchwork_1.1.2              
##  [67] caTools_1.18.2                grid_4.4.0                   
##  [69] data.table_1.14.8             ggtree_3.8.2                 
##  [71] ruv_0.9.7.1                   irlba_2.3.5.1                
##  [73] gridGraphics_0.5-1            ellipsis_0.3.2               
##  [75] lazyeval_0.2.2                yaml_2.3.7                   
##  [77] survival_3.5-3                BiocVersion_3.17.1           
##  [79] crayon_1.5.2                  RcppAnnoy_0.0.21             
##  [81] tidyr_1.3.0                   progressr_0.14.0             
##  [83] tweenr_2.0.2                  later_1.3.1                  
##  [85] codetools_0.2-20              base64enc_0.1-3              
##  [87] Seurat_4.3.0.1                KEGGREST_1.40.0              
##  [89] bbmle_1.0.25.1                startupmsg_0.9.6.1           
##  [91] limma_3.56.2                  filelock_1.0.2               
##  [93] foreign_0.8-86                pkgconfig_2.0.3              
##  [95] xml2_1.3.5                    sfsmisc_1.1-17               
##  [97] aplot_0.2.0                   spatstat.sparse_3.0-2        
##  [99] ape_5.7-1                     viridisLite_0.4.2            
## [101] xtable_1.8-4                  car_3.1-2                    
## [103] highr_0.10                    plyr_1.8.8                   
## [105] httr_1.4.6                    tools_4.4.0                  
## [107] globals_0.16.2                SeuratObject_4.1.3           
## [109] pkgbuild_1.4.2                beeswarm_0.4.0               
## [111] htmlTable_2.4.1               broom_1.0.5                  
## [113] checkmate_2.2.0               nlme_3.1-164                 
## [115] loo_2.7.0                     HDO.db_0.99.1                
## [117] dbplyr_2.3.3                  lme4_1.1-34                  
## [119] digest_0.6.33                 numDeriv_2016.8-1.1          
## [121] farver_2.1.1                  yulab.utils_0.0.7            
## [123] viridis_0.6.4                 cvTools_0.3.2                
## [125] rpart_4.1.23                  glue_1.6.2                   
## [127] cachem_1.0.8                  BiocFileCache_2.8.0          
## [129] polyclip_1.10-4               Hmisc_5.1-1                  
## [131] generics_0.1.3                proxyC_0.3.3                 
## [133] Biostrings_2.68.1             mvtnorm_1.2-2                
## [135] parallelly_1.36.0             statmod_1.5.0                
## [137] ragg_1.2.5                    ScaledMatrix_1.8.1           
## [139] minqa_1.2.5                   carData_3.0-5                
## [141] pbapply_1.7-2                 gson_0.1.0                   
## [143] dqrng_0.3.0                   utf8_1.2.3                   
## [145] graphlayouts_1.0.0            StanHeaders_2.32.6           
## [147] gtools_3.9.4                  readxl_1.4.3                 
## [149] ggsignif_0.6.4                shiny_1.7.5                  
## [151] GenomeInfoDbData_1.2.10       rhdf5filters_1.12.1          
## [153] RCurl_1.98-1.12               memoise_2.0.1                
## [155] rmarkdown_2.23                downloader_0.4               
## [157] scales_1.2.1                  future_1.33.0                
## [159] RANN_2.6.1                    spatstat.data_3.0-1          
## [161] rstudioapi_0.15.0             cluster_2.1.6                
## [163] QuickJSR_1.1.3                rstantools_2.4.0             
## [165] spatstat.utils_3.0-3          hms_1.1.3                    
## [167] fitdistrplus_1.1-11           munsell_0.5.0                
## [169] cowplot_1.1.1                 colorspace_2.1-0             
## [171] rlang_1.1.1                   DelayedMatrixStats_1.22.1    
## [173] sparseMatrixStats_1.12.2      ggforce_0.4.1                
## [175] mgcv_1.9-1                    xfun_0.39                    
## [177] reldist_1.7-2                 GOSemSim_2.26.1              
## [179] tibble_3.2.1                  interactiveDisplayBase_1.38.0
## [181] rstan_2.32.6                  treeio_1.24.3                
## [183] Rhdf5lib_1.22.0               bitops_1.0-7                 
## [185] ps_1.7.5                      promises_1.2.0.1             
## [187] scatterpie_0.2.1              RSQLite_2.3.1                
## [189] qvalue_2.32.0                 qlcMatrix_0.9.7              
## [191] compiler_4.4.0                prettyunits_1.1.1            
## [193] boot_1.3-30                   beachmat_2.16.0              
## [195] listenv_0.9.0                 Rcpp_1.0.11                  
## [197] edgeR_3.42.4                  AnnotationHub_3.8.0          
## [199] tensor_1.5                    MASS_7.3-60.0.1              
## [201] progress_1.2.2                spatstat.random_3.1-5        
## [203] R6_2.5.1                      fastmap_1.1.1                
## [205] fastmatch_1.1-4               rstatix_0.7.2                
## [207] vipor_0.4.5                   distr_2.9.3                  
## [209] ROCR_1.0-11                   rsvd_1.0.5                   
## [211] nnet_7.3-19                   gtable_0.3.3                 
## [213] KernSmooth_2.23-22            miniUI_0.1.1.1               
## [215] deldir_1.0-9                  htmltools_0.5.8.1            
## [217] RcppParallel_5.1.7            bit64_4.0.5                  
## [219] spatstat.explore_3.2-1        lifecycle_1.0.3              
## [221] zip_2.3.0                     processx_3.8.2               
## [223] nloptr_2.0.3                  callr_3.7.3                  
## [225] sass_0.4.9                    vctrs_0.6.3                  
## [227] slam_0.1-50                   spatstat.geom_3.2-4          
## [229] robustbase_0.99-0             DOSE_3.26.1                  
## [231] ggfun_0.1.2                   sp_2.0-0                     
## [233] future.apply_1.11.0           bslib_0.7.0                  
## [235] pillar_1.9.0                  batchelor_1.16.0             
## [237] gplots_3.1.3                  metapod_1.8.0                
## [239] locfit_1.5-9.8                jsonlite_1.8.7